% 

%%%%%%% small_network_mcmc .m %%%%%%% 

% 

% 

% This program utilizes a Markov Chain Monte Carlo 

% method for the prediction of the most likely configuration 

% or state of a regulatory network. 

% 

% 

% smg - Last Modified: 8/7/00 

frame_count = 0; 
rejected = 0; 
edge_numfoer (1,1) = 0 ; 

% Ask user for set-up information 

configuration = input ('Starting from scratch (0) or from an initial Adjacency mtx (1) : 

if configuration == 1 

A__dir = input ('Enter path and filename for Adjacency matrix:'); 
else 

Ls initial_edges = input ( 1 Input initial number of edges: ■ ); 
Sid 

lpiput„dir =. input ('Enter directory for probability matrices: '); 

frterations = input ('Input number of iterations: '); 

burn_in = input ('Enter number of burn- in iterations: '); 

directory = input {'Enter directory of input files in single quotes: « ); 

= input ('Enter maximum number of edges to sample (change) at one time: '); 
jave_interval = input ( ' Enter save interval : ' ) ; 

Z * 

jj^j Load precalculated arrays 
IBk(input_dir) ; 

iload edge_matrix; % used for sampling edges 

ftbad edge_probabilities; 

grobarray = edge_probabilities; % probability of an edge 
Jqflear edge_probabilities; 
load edge_probabilities_zero 

probarray_zero = edge_probabilities_zero; % probability of not having edge 
clear edge_probabilities_zero; 

% Load look-up table for In of factorial 

load lnfac.txt -ascii; 

lnfac = reshape (lnfaC ,10008,1) ; 



% Load data 
cd (directory) ; 
names = dir; 

% get vertices 

for n = 3 : length (names ) ; %3~708 (Curagen) 3-13 (test) 3-641 (DIP) - process each file 

filename = names (n) .name; 
datain = load ( filename ) ; 
protein (n-2) .id = n-2; 
newname = strtok( filename, ' . ' ) ; 
newname = str2num( newname) ; 
protein (n-2) .name = newname; 
protein .domains = datain { 1 : end) ; 

idnamelist (n-2) = newname; 



end 

len = length ( idnamel ist ) ; 



% number of vertices 



edge_freq = sparse (len, len) ; 

% Set up random edges between vertices 
if configuration == 0 
A = sparse (len, len) ; 
for i = l:initial_edges 

i_init = 1 + floor (rand*len) ; 
j_init = 1 + floor (rand*len) ; 
A(i_init , j_init) = 1; 

end 
else 

% must be called Alast - Make more user friendly 
load(A_dir) ; 

A = Alast; % A = Alast; 
clear Alast; 

end 



edgesinit = nnz (A) % number of interactions 

iedgevector = sum (A, 2); % number of outgoing edges for each vertex 

inedgevector = sum (A) ; % number of incoming edges for each vertex 

|pge_number ( 1 , 1 ) = edgesinit; 

^Calculate edge probabilities 

|Uprob_edges is the sum of log(prob) for each existing edge 

S*!prob_edges_zero is the sum of log(prob) for each non-existant edge 

^rob_edges = 0; 

prob_edges_zero = 0; 

for i = l:len 

1 for j = l:len 

P if A(i,j) == 1 

Iv prob_edges = prob_edges + log (probarray (i, j ) ) ; 

U ; else 

r 

y t prob_edges_zero = prob_edges_zero + log(probarray_zero (i , j ) ) ; 

end 

Jr: end 
end 



%%%%%%%%%%%%%%%% Calculate system probability %%%%%%%%%%%%%%%% 
LOGPTRANS = prob_edges; 
LOGPTRANSzero = prob_edges_zero; 



%%%%%%%%%%%%%%%%% incoming edge probability - Multinomial 

% Determine the number of vertices with 0,1,.. incoming edges 

invertexedgevect = zeros ( 1 , max ( inedgevector ) +1 ) ; 

fori = 1: length (inedgevector) % i = 1 is vert w/ zero incoming edges 

invertexedgevect (inedgevector (i)+l) = invertexedgevect (inedgevector (i) +1) + 

end 

% Multinomial dist. 

MIN = sum ( invertexedgevect ) ; % Total number of vertices 

LOGPIN = 0; 
LOGDENIN = 0; 

for i = 1: length (invertexedgevect) 

LOGPIN = LOGPIN + invertexedgevect ( i ) *log (probinedge ( i-1 )) ; 
LOGDENIN = LOGDENIN + Infac (invertexedgevect (i) + 1); 

end 

LOGNUMIN = lnfac(MIN+l) ; 



%%%%%%%%%%%%%%%%%%%% Outgoing edge probability - Multinomial 
% Determine the number of vertices with 0,1,.. outgoing edges 
vertexedgevect = zeros (l / max( edgevec tor) +1) ; 

for i = 1: length (edgevec tor) % i = 1 is vertex w/ 0 outgoing edges 

vertexedgevect ( edgevect or (i)+l) = vertexedgevect (edgevec tor (i) +1 ) + 1; 

end 

% Multinomial dist. 

M = sum (vertexedgevect ) ; % Total number of vertices (= M1N) 

LOGPOUT = 0; 
LOGDEN = 0; 

for i = 1 : length (vertexedgevect) 

LOGPOUT = LOGPOUT + vertexedgevect ( i ) *log (edgedist ( i-1 )) ; 
LOGDEN = LOGDEN + lnfac (vertexedgevect ( i ) + 1); 

end 

LOGNUM = lnfac{M+l); 

LOGDEN; 

LOGPOUT; 



[Mk%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

f%; Likelihood of system in initial configuration 

ppGSYSX = ( LOGNUM- LOGDEN ) +LOGPOUT+ ( LOGNUMIN- LOGDENIN ) +LOGPIN+LOGPTRANS+LOGPTRANSzero ; 
rfprintf( 'Initial Probability = %5 . 4E\n\n« , LOGSYSX) 

y 

Save initial configuration - for reference 
JppGSYSXorig = LOGSYSX; 
s&prig = A; 

e^dgevectororig = edgevector; 
Jinedgevectororig = inedgevector ; 
*v4rtexedgevectorig = vertexedgevect; 
llnvertexedgevectorig = invertexedgevect; 

M; 

4 : Add or remove edge at random & recompute likelihood 

flag =1; % for testing purposes 

if flag == 1; 

prob_edgescomp = prob_edges; 
prob_edges_zerocomp = prob_edges_zero; 
edgevec torcomp = edgevector; 
inedgevectorcomp = inedgevector; 
Acomp = A; 



% set up matrix for observing frequency of edge sampling 
edge_freq - zeros ( len, len) ; 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
tic; 

for x = 1: iterations 

% choose to add or remove 
if rand < 0.5 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% add edge 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 



% create AO matrix 
AO = (A==0) ; 



f 

J 

i 

% select edge from those that are currently = 0 
[edges , q_x2y, q_j^2x] = edge_selection (ks, AO, Infac) ; 

[edge__rows, edge__cols] = size(edges); 
% update parameters 
for t = 1 : edge_rows 

A(edges (t, 1) , edges (t, 2) ) = 1; 

prob_edges = prob_edges + log (probarray (edges (t, 1) , edges (t, 2) )) ; 
prob_edges_zero = prob_edges_zero - log (probarray_zero { edges ( t , 1 ) , edges ( t , 2 ) ) ) 
edgevector (edges <t, 1) ) = edgevector (edges ( t, 1) ) +1; % update edge vectors 

inedgevector (edges (t, 2) ) = inedgevector (edges ( t , 2 ) ) + 1; 
edge_freq (edges (t, 1) ,edges(t,2) ) = edge_freq ( edges (t, 1) , edges (t, 2) ) + 1; 

end 



% flag showing state 
add_edge = 1; 

else 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% remove an edge 

% select preferentially from proteins that should be less well connected 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

% select edge to remove 

[edges, q__x2y,q_y2x] = edge_s elect ion (ks , A, lnfac ) ; 
% change edge 

[edge_rows,edge_cols] = size(edges); 

% update parameters 
for t = 1: edge_rows 

A(edges(t,l) , edges (t, 2) ) = 0; 

prob_edges = prob_edges - log (probarray ( edges ( t , 1 ) , edges ( t , 2 ) ) ) ; 
prob_edges_zero = prob_edges_zero + log (probarray_zero (edges ( t , 1 ), edges ( t, 2 )) ) 
edgevector (edges (t, 1) ) = edgevector (edges (t, 1) ) - 1; 
inedgevector ( edges ( t , 2 ) ) = inedgevector ( edges ( t , 2 ) ) - 1 ; 

edge_freq(edges(t,l) ,edges(t,2) ) = edge_freq (edges (t, 1) , edges ( t, 2 ) ) + 1; 

end 



% flag showing state 
add_edge = 0; 

end 



%%%%%%%%%%% 

% Essentially, a repeat of initial system probability calculation 

LOGPTRANSY = prob_edges; % sum of transition probabilities for 

LOGPTRANSzeroY = prob_edges_zero ; % new system (w/ or w/o new edge) 

%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Incoming edge probability 

invertedgenew = zeros (1, max (inedgevector ) +1) ; % # of vert, w/ 0,1,,.. edges 
fori = 1: length (inedgevector) % i = 1 -> vert w/ 0 incoming edges 

invertedgenew ( inedgevector (i)+l) = invertedgenew ( inedgevector (i)+l) + 1; 

end 



MINY = sum (invertedgenew) ; % Calc Multinomial 
LOGPINY = 0; 



LOGDENINY = 0; 

for i = 1: length ( invert edgenew) 

LOGPINY = LOGPINY + invertedgenew (i) *log (probinedge { i-1 ) ) ; 
LOGDENINY = LOGDENINY + lnfac ( invertedgenew ( i ) + 1); 

end 

LOGNUMINY = lnf ac (MINY+1 ) ; 



%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Outgoing edge probability 
vertedgenew = zeros (1, max (edgevec tor) +1) ; 
for i = 1: length (edgevec tor) 

vertedgenew (edgevec tor (i) +1) = vertedgenew ( edgevec t or (i) +1) + 1; 

end 

MY = sum (vertedgenew) ; % Calc Multinomial 

LOGPOUTY = 0; 
LOGDENY = 0; 

for i = 1: length (vertedgenew) 

LOGPOUTY = LOGPOUTY + vertedgenew ( i ) *log ( edgedist ( i-1 )) ; 
LOGDENY = LOGDENY + lnf ac (vertedgenew ( i ) + 1) ; 

end 

LOGNUMY = lnfac(MY+l); 

X - 

fops' 

|y %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

% Probability of new configuration 
LOGSYSY = (LOGNUMY- LOGDENY) +LOGPOUTY+ ( LOGNUMINY - 
|OGDENINY ) + LOGP INY+LOGPTRANS Y+LOGPTRANS z er o Y ; 



p { % Check if iterations>burn_in & establish plotting interval based on 

It: % the number of edges existing after burn in 

\V if x == burn_in 

N ! %save_interval = nnz (A) ; 

M; %Astart = A; 

f -i Asave = A; 

n \ saves = 1 ; 

f * SYSVECT ( saves , 1 ) = x; 

SYSVECT (saves, 2) = LOGSYSX; 

edge_number ( 1 , saves ) =nnz (A) ; 

frame_count = 1; 

frame (frame_count) .adj = Asave; 

f rame ( f rame_count ) .edge_num = nnz (A) ; 

end 



% Save configuration after every ' save_interval ' iterations 
if x > burn_in & rem(x, save_interval) == 0 

Asave = Asave + A; 

saves = saves + 1; 

SYSVECT ( saves , 1 ) = x; 

SYSVECT ( saves , 2 ) = LOGSYSX; 

edge_number ( 1 , saves ) = nnz (A) ; 

end 



% Save "frames" for animation of edge probabilities 
if x > burn_in & rem(x, 200*save_interval) == 0 

frame_count = frame_count + 1; 

frame ( f rame_count ) . ad j = Asave ; 

frame (frame_count) .edge_num = nnz (A) ; 

end 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

% probability of accepting edge 

paccept = exp{(LOGSYSY + CL_y2x) - (LOGSYSX + q__x2y) ) ; 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 



% decide whether or not to keep change 



if rand <= paccept 
% keep change 
Acomp = A; 

edgevectorcomp = edgevector; 
inedgevectorcomp = inedgevector; 
prob_edgescomp = prob_edges; 
prob_edges_zerocomp = prob_edges_zero; 

fprintf ( • %5 . 0d\tpaccept=%3 . 2E\tEdges=%4d\nLOGOLD=%5 . 4E\tLOGNEW=%5 . 4E\tDif f =%5 4E\n * 
x,paccept,nnz(A) , LOGSYSX, LOGSYSY, LOGSYSY-LOGSYSX) 

fprintf (' q_x2y= %5 . 4E\tci_y2x= %5 . 4E\n\n' , q_x2y, q_y2x) 

LOGSYSX = LOGSYSY; 



TKSS-' 



% update sampling distributions appropriately when an edge is added 



% or removed 



else 

% proposed addition or removal of edge is not accepted 
.rejected = rejected + 1; 

ff\ A = Acomp; % reset values to that of the previous iteration 

yj edgevector = edgevectorcomp; 

y inedgevector = inedgevectorcomp; 

JU prob_edges = prob_edgescomp; 

S B ! prob_edges_zero = prob_edges_zerocomp; 

IV end 

jbnd 

fend 

jboc; 



%save d:\apps\matlabrll\work\dipdata\Astart.mat Astart; 
%save d:\apps\matlabrll\work\dipdata\Asave.mat Asave; 



r e j ec t ed / i t er a t i ons 
nnz (A) 
%f igure; 

%gplot ( Abes t , nodeloc , ' -o ' ) ; 

clear Acomp; 
clear AO; 



% percent of changes rejected 



% Plot system probability at each ■ save_interval ' 



figure; 

plot (SYSVECT( : , 1) , SYSVECT ( : , 2 ) ) ; 
xlabel ( ' Iteration Number ■ ) 
ylabeK 'Log(L) ' ) 
title (['MCMC test - \date]) 

%text(14000,-1608, '1500 edges-start, ### edges-end, s = 0.025, 50% Rejection') 
figure; 

pcolor (Asave. /saves) ; 
shading interp 
colorbar ( ' v* ) 
xlabel ( 'Vertex ' ) 



ylabel ( 'Vertex 1 ) 

title ( 'Fraction of Time Edge is Occupied') 
figure; 

plot (edge_number) 
xlabel ( ■ Save Number ' ) 
ylabel { 'Number of Edges ' ) 

figure; 

pcolor {edge_f req) ; 
shading interp 
colorbar ( ' v' ) 

title ( 'Frequency of Edge Sampling'); 



7Z 



f. : 



function [out , q_x2y, q^2x] = edge_selection (ks , A, lnf ac) 
% 

% out = edge__selection(ks, A, Infac) 

% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix, 
% 'ks* is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len* len; 

% find all l's in A 

[source, target] = find(A==l) ; 

% number of interactions available for sampling 
edges_avail = length (source) / 

jfci number of edges to be sampled 

^ges_sampled = 1 + f loor (rand*ks) ; % rand* edge s_avail 

N 

IjMmake sure you don't sample from more than available 
Jfk edges_sampled > edges_avail 

y\ edges__sampled = edges_avail; %useful only for ks case 

lend 

n» 

MM* 

V mix edges to be sampled 
? <lnoose_from_this = randperm(edges_avail) ; 

pi: 

V 

jtg, Sample edges from 1 to edges_sampled 

gut = zeros (edges_sampled, 2 ) ; %initialize out 

But ( : , 1) = source (choose_f rom__this (1 : edges_sampled) ) ; 
out ( : , 2 ) = target (choose_from_this (1 : edges_sampled) ) ; 



% Calculate the proposal distribution from new to old and old to new 
qx = 0; 

qy = 0; 

opp_edges = total_edges - edges_avail + edge s_s amp led; 

for i = l:edges_sampled 

qx = gx + log( ( edge s_samp led - i + 1 ) / (edges_avail - i + 1) ) ; 
qy = qy + log( ( edge s_s amp led - i + 1) / (opp_edges - i +1) ) ; 

end 

q_x2y = qx; 
q_y2x = qy; 



function [out , q_x2y, q_y2x] = edge_s elections Id <ks, A, lnf ac) 
% 

% out = edge_selection__old(ks, A, lnfac) 
% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix. 
% 'ks' is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len*len; 

% find all l's in A 

[source, target] = find(A==l); 

% number of interactions available for sampling 
edges_avail = length (source) ; 



number of edges to be sampled 
pclges_sampled = 1 + f loor (rand*ks) ; %rand*edges avail 

r- S 

Symake sure you don't sample from more than available 
4!f edges_sampled > edges__avail 

m edges_sampled = edges_avail; %useful only for ks case 

fend 

** 

S; sample interactions available and place x and y coordinates i 
S&t = zeros(edges_sampled,2) ; %initialize out 
fErows, cols] = size(out); 

|*ijiile i <= edges_sampled 

JJi x = 1 + floor (rand* edge s_avail) ; % pick vertex 

*** for n = l:rows 

if intersect ( [source (x) , target (x) ] ,out, 'rows 1 ) 

% sampling previously sampled point 

% try another 

break; 
else 

% previously unsampled - place in out matrix 
out(i,l) = source(x); 
out(i,2) = target (x); 
i = i + 1; 

end 

end 

end 
out; 



% Calculate the proposal distribution from new to old 
qx = 0; 
qy = 0; 

opp_edges = total_edges - edges_avail + edge s_s amp 1 ed ; 

for i = l:edges_sampled 

qx = qx + log{ (edges_sampled - i + 1) / (edges^avail - i + 1)) ; 
qy = qy + log( (edges_sampled - i + 1 ) / ( opp_edges - i +1) ) • 

end 

q_x2y = qx; 
Q_y2x = qy; 



function [out , QL.x2y, q_y2x] = edge_selection (ks, A, lnfac) 
% 

% out = edge_selection(ks, A, lnfac) 

% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix. 

% 'ks* is the maximum number of edges to be sampled. 

% 



len = length (A) ; 
total__edges = len*len; 

% find all l«s in A 

[source, target] = find(A==l); 

% number of interactions available for sampling 
edges__avail = length ( source ) ; 

^number of edges to be sampled 

giges_sampled = 1 + f loor (rand*ks) ; %rand*edges_avail 

V i 

' •7,,. ! 

IMmake sure you don't sample from more than available 
|t edges_sampled > edges_avail 

Oft edges^sampled = edges_avail ; %useful only for ks case 

£nd 

55 

%*;mix edges to be sampled 
B&oose_from_J:his = randperm (edges_avail) ; 

|y ; Sample edges from 1 to edges_sampled 

pit = zeros (edges_sampled,2) ; %initialize out 

£ 5 I 

tmt ( : , 1 ) = source { choose_f rom_this ( 1 : edges_sampled) ) ; 
out ( : , 2) ss target (choose_f rom_this (1 : edges_sampled) ) ; 



% Calculate the proposal distribution from new to old and old to 
qx = 0; 
qy = 0; 

opp_edges = total_edges - edges_avail + edges_sampled; 

for i = 1 :edges_sampled 

qx = qx + log( (edges_sampled - i + 1 ) / (edges___avail - i + 1)); 
qy = qy + log ( (edges^sampled - i + 1 ) / ( opp_edges - i +1)); 

end 

q_x2y = qx; 
qjr2x = qy; 



I 

function [out , g_x2y / g.y2x1 = edge_selection_old (ks , A, lnf ac) 
% 

% out = edge_selection_old(ks,A, lnfac) 

% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix. 
% 'ks' is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len*len; 

% find all l's in A 

[source, target] = f ind(A==l) ; 

% number of interactions available for sampling 
edges_avail = length (source) ; 

¥ : number of edges to be sampled 

|ages_sampled = 1 + f loor (rand*ks ) ; %rand*edges_avail 

5— — i 

%.jmake sure you don't sample from more than available 
33 edges_sampled > edges_avail 

% edges_sampled = edges_avail; %useful only for ks case 

Mnd 

x a ; 

|j sample interactions available and place x and y coordinates in "out" 
ljut = zeros(edges_sampled,2) ; %initialize out 
■fjrows, cols] = size ( out ); 

r= 1; 

while i <= edges_sampled 

CI x = 1 + floor (r and* edges_a vail) ; % pick vertex 

RU for n = l:rows 

v — 

if intersect ( [source (x) , target (x) ] , out, 'rows' ) 

% sampling previously sampled point 

% try another 

break; 
else 

% previously unsampled - place in out matrix 
out(i,l) = source(x); 
out(i,2) = target (x); 
i = i + 1; 

end 

end 

end 
out; 



% Calculate the proposal distribution from new to old 
qx = 0; 
qy = 0; 

opp_edges = total_edges - edges_avail + edges__sampled; 

for i = 1 : edges_sampled 

qx = qx + log( (edges_sampled - i + 1) / (edges_avail - i + 1) ) ; 
qy = qy + log ( (edge s_s amp led - i + 1) / (opp_edges - i +1)); 

end 

q_x2y = qx; 
a v2x = qy; 



r. 
f 

function prob = edgedist (numedges) 
% 

% This function takes as input the number of outgoing 
% edges from a vertex and returns the probability of 
% this as "prob" 
% 

% Based on fits to experimental data 
% 

% smg - Last Modified: 7/22/00 
% 

if numedges == 0 

prob = 0.489751; 
else 

prob = 0.304647*numedges. /v (-1.9691) ; 
end 



IP 






function y = probinedge (numedges) 

% 

% 

% 

% 

% 



if numedges == 0 
y = 0.306735; 
else 

y = 0.556352*numedges.^ (-2.8037) ; 
end 
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